#!/usr/bin/env python
# Copyright (c) 2011-2012, Universite de Versailles St-Quentin-en-Yvelines
#
# This file is part of ASK.  ASK is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, version 2.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

"""
This module select n random samples from a set of points in filename
"""
import random

from common.configuration import Configuration
from common.util import fatal


def select_n_random(input_file, output_file, n):
    """
    Select n random points in the file. This approach is innefficient since it
    loads the entire file in memory to sample it.  If it becomes a bottleneck
    we should try the approach outlined in
    http://www.bryceboe.com/2009/03/23/random-lines-from-a-file/
    """

    inpf = open(input_file, "r")
    lines = inpf.readlines()
    inpf.close()
    if (n > len(lines)):
        fatal("Not enough points in {0} to satisfy your sampling size of {1}"
              .format(input_file, n))

    samples = random.sample(lines, n)
    outf = open(output_file, "w")
    for s in samples:
        outf.write(" ".join(s.split()[:len(conf("factors"))]) + "\n")

    outf.close()


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(
        description="Select random samples from a set of"
        " points in the filename")
    parser.add_argument('configuration')
    parser.add_argument('output_file')
    args = parser.parse_args()

    conf = Configuration(args.configuration)
    if "seed" in conf("modules.bootstrap.params"):
        random.seed(conf("modules.bootstrap.params.seed", int))

    select_n_random(conf("modules.bootstrap.params.data_file"),
                    args.output_file, conf("modules.bootstrap.params.n", int))
